* This file takes datasets created in dofile 01-03, combines, and creates variables for analysis

set more off

* Take cleaned a&e data
u "$saveddata/data_ae2011", clear
append using "$saveddata/data_ae2012"
* Keep only major departments (24-hour consultant led facilities aka major hospital departments)
keep if aedepttype==1

* add inpatient data for those that are admitted (recorded same-day)
gen admidate_f = exitdate
merge m:1 extract_hes admidate_f admit using "$saveddata/data_inpat2012.dta", keepusing(spell_los number_proc cost dismeth_f tretspef) 
drop if _merge==2	// need to re-include these if we want 30-day measures of intensity
tab _merge admit if finyear==2012, nofreq col
rename _merge _merge2

merge m:1 extract_hes admidate_f admit using "$saveddata/data_inpat2011.dta", keepusing(spell_los number_proc cost dismeth_f tretspef) update // need to update - this was not updating, and so 2012 inpatient outcomes were missing //
drop if _merge==2	// need to re-include these if we want 30-day measures of intensity
tab _merge2 admit if finyear==2011, nofreq col
rename _merge _merge1

* merge checks and missing data variable
gen missing_inpat = admit & _merge1==1 & _merge2==1 	// admit and missing inpatient data
assert _merge1==1 & _merge2==1 if !admit		// non-admit and no inpatient data 
drop _merge*

* set intensity variables to zero for non-admits
foreach var of varlist spell_los number_proc cost {
	replace `var' = 0 if `var'==. & !admit
	}
gen missing_cost = cost_em==.
label var cost_em "Cost associated with initial inpatient admission"

* health outcome variables (incl. 2013 data for revisits/admits)
append using "$saveddata/data_ae2013", force
sort extract_hesid arrivaldate
by extract_hesid: gen daydiff = arrivaldate[_n+1] - arrivaldate
gen revisit30 = daydiff<=30
gen readmit30 = daydiff<=30 & admit[_n+1] 
gen revisit365 = daydiff<=365
gen readmit365 = daydiff<=365 & admit[_n+1]
drop if arrivaldate>=td(1apr2013)


gen death = aeattenddisp==10 | dismeth_f==4
replace death = . if missing_inpat		// cannot evaluate death if no inpatient data

* discharge variables
gen discharge_gp = aeattenddisp==2
gen discharge_no = aeattenddisp==3

* create bins
gen bin = 10 if depdur<=10
forv x = 20(10)800 {
	local y = `x' - 10
	replace bin = `x' if depdur>`y' & depdur<=`x'
	}
replace bin = 810 if depdur>800 & depdur!=.

* Create A&E and total costs
merge m:1 sushrg finyear using "$inputdata/ae_hrg.dta"

gen ae_cost = ae_tariff_usd
gen missing_ae_cost = 0
replace missing_ae_cost=1 if _merge==1
drop _merge

set more off
gen total_cost = ae_cost + cost_em

label var ae_cost "ER cost"
label var total_cost "Total cost of hospital treatment"

* predicted outcomes
gen dofw = dow(arrivaldate)
gen wofy = week(arrivaldate)


* Generate simple predictions of mortality and admission
reghdfe admit, absorb(i.d_age##i.d_male##i.d_ambulance, savefe)
predict d_admit, xbd

reghdfe death, absorb(i.d_age##i.d_male##i.d_ambulance, savefe)
predict d_death, xbd


* drop patients with missing information
gen sample = 1
foreach var in d_age d_ambulance d_male d_urban tcount icount admit discharge discharge_gp discharge_no spell_los cost_em number_proc revisit30 readmit30 death ae_cost total_cost{
replace sample = 0 if `var'==.
}

* Keep only those in the sample
keep if sample==1
drop sample


** Then add the 30-day costs here
gen ae = 1
rename cost_em cost_em_orig

*gen admidate_f = exitdate

set more off
* Here append all inpatient visits
foreach year in 2011 2012 2013{
append using "$saveddata/data_inpat`year'.dta"
}

sort extract admidate_f

forval x=1(1)20{
by extract_hesid: gen daydiff_inpat`x' = admidate_f[_n+`x'] - arrivaldate

gen costem30_`x' = cost_em[_n+`x'] if daydiff_inpat`x'<=30
gen costem365_`x' = cost_em[_n+`x'] if daydiff_inpat`x'<=365
}

keep if ae==1

egen costem30_all = rowtotal(costem30_*) if ae==1
egen costem365_all = rowtotal(costem365_*) if ae==1

* Need to make 30 day costs for A&E also
append using "$saveddata/data_ae2013", force

* Create A&E and total costs

** First merge in A&E tariff for 2013 (already in the 2011 and 2012 data)
merge m:1 sushrg finyear using "$inputdata/ae_hrg2013.dta", update

replace ae_cost = ae_tariff_gbp if finyear==2013
* Deflate and convert into dollars
replace ae_cost = ae_cost*1.060*1.351 if finyear==2013

set more off
sort extract arrivaldate

forval x=1(1)20{
by extract_hesid: gen daydiff`x' = arrivaldate[_n+`x'] - arrivaldate

gen aecost30_`x' = ae_cost[_n+`x'] if daydiff`x'<=30
gen aecost365_`x' = ae_cost[_n+`x'] if daydiff`x'<=365
}

drop if finyear==2013

gen aecost30_0 = ae_cost
egen costae30_all = rowtotal(aecost30_*)

gen cost30_aeem = costae30_all + costem30_all


drop daydiff*

gen combo = 0
replace combo = 1 if revisit30==1 | death==1

gen othexit = 0
replace othexit = 1 if admit==0 & discharge==0

cap drop *new

replace d1=40 if d1==.

* Build time info for breaking down wait times
gen middur = depdur - tretdur
gen interdur = tretdur - initdur

* Keep necessary variables
keep extract aeattenddisp tcount icount admit discharge* spell_los number_proc death othexit d_* costem30_all costae30_all cost30_aeem depdur concldur middur tretdur interdur initdur bin site_code dofw wofy arrivalhour arrivaldate d1

** Add demographic variables

merge m:1 extract using  "$saveddata/charl2010.dta"
drop if _merge==2
drop _merge

merge m:1 extract using  "$saveddata/data_2010visits_aeonly.dta"
drop if _merge==2
drop _merge

merge m:1 extract using "$saveddata/data_inpat2010_aeonly_jun20.dta"
drop if _merge==2
drop _merge


foreach var in n_em_admit2010 los2010 charlindex {
replace `var'=0 if `var'==.
}

* predict in-hospital mortality and admissions with all variables used in Table 2

cap drop d_death_hat 
cap drop d_admit_hat

reghdfe death, absorb(i.d_age##i.d_ambulance##i.d_male i.dofw##i.arrivalhour##i.wofy i.charlindex n_em_admit2010 los2010, savefe)
predict d_death_hat, xbd

reghdfe admit, absorb(i.d_age##i.d_ambulance##i.d_male i.dofw##i.arrivalhour##i.wofy i.charlindex n_em_admit2010 los2010, savefe)
predict d_admit_hat, xbd



****************************************************************************
* Merge in mortliaty data and create all-location mortality indicators here
****************************************************************************

gen encrypted_hesid = extract_hesid

merge m:1 encrypted_hesid using "M:\HES_Mortality_Data\HES-mortality/mortality_data"
drop if _merge==2
assert _merge!=2

rename dod dod_string

gen death_year = substr(dod_string,1,4)
gen death_month = substr(dod_string,6,2)
gen death_day = substr(dod_string,9,2)

destring death_year, replace
destring death_month, replace
destring death_day, replace

gen dod = mdy(death_month, death_day, death_year)

gen death_ever = 0
replace death_ever = 1 if _merge==3

gen death_gap = .

replace death_gap = dod - exitdate if death_ever==1

assert dod!=. if _merge==3

gen death30 = 0
replace death30 = 1 if death_gap>=0 & death_gap<=30 & dod!=.

gen death90 = 0
replace death90 = 1 if death_gap>=0 & death_gap<=90 & dod!=.

gen death365 = 0
replace death365 = 1 if death_gap>=0 & death_gap<=365 & dod!=.

rename death mort
label var mort "In-hospital 30-day mortality"
foreach xin 30 90 365{
	label var death`x' "All-location `x'-day mortality"

}

* Keep necessary variables
keep aeattenddisp tcount icount admit discharge* spell_los number_proc mort death30 death90 death365 othexit d_* costem30_all costae30_all cost30_aeem depdur concldur middur tretdur interdur initdur bin site_code charlindex n_em_admit2010 los2010 dofw wofy arrivalhour  arrivaldate cause_of_death mort d1
 
compress
save "$saveddata/data_complete_withcosts.dta", replace


* Create three definitions of diagnosis complexity
su mort
local tdeath = r(mean)

su depdur, det
local tdepdur = 220

su d_ambulance
local tambulance = r(mean)

replace d1 = 40 if d1==.
collapse (mean) mort d_ambulance (p80) depdur, by(d1)

rename mort death
rename d_ambulance ambulance
* create indicators
foreach i in death depdur ambulance {
	gen complex_`i' = `i'>`t`i''
	}

* compare indicators
tab complex_death complex_depdur
tab complex_death complex_ambulance
tab complex_depdur complex_ambulance

* save
compress
save "$saveddata/lookup_complexity.dta", replace



